# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627

# DEEP LEARNING

# Import necessary libraries
! pip install pandas;
! pip install numpy;
! pip install scikit-learn;
! pip install matplotlib;
! pip install seaborn;
! pip install tensorflow;
! pip install ISLP;

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

from tensorflow import keras
from tensorflow.keras import layers

from ISLP import load_data

# Load the datasets
url = "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/USArrests.csv"
USArrests = pd.read_csv(url, index_col=0)

# Check the first few rows of the dataset
print(USArrests.head())

            Murder  Assault  UrbanPop  Rape
rownames                                   
Alabama       13.2      236        58  21.2
Alaska        10.0      263        48  44.5
Arizona        8.1      294        80  31.0
Arkansas       8.8      190        50  19.5
California     9.0      276        91  40.6

# Standardize data
X = StandardScaler().fit_transform(USArrests)
features = USArrests.columns  # Get column names for the features

# Add state names as row labels (in this case, USArrests' index has state names)
state_names = USArrests.index

# Perform PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plot the PCA components
plt.figure(figsize=(12, 9))
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("Biplot with State Labels and Enhanced Arrows")

# Add labels for each state
for i, state in enumerate(state_names):
    plt.text(X_pca[i, 0], X_pca[i, 1], state, ha='right', color='blue', fontsize=8)

# Plot arrows (loadings) for each feature
for i, feature in enumerate(features):
    plt.arrow(0, 0, 
              pca.components_[0, i] * 2,  # Adjust 2 to make arrows longer or shorter
              pca.components_[1, i] * 2, 
              color='red', 
              width=0.02,  # Thickness of the arrow
              head_width=0.1)  # Width of the arrowhead
    plt.text(pca.components_[0, i] * 2.2, pca.components_[1, i] * 2.2, 
             feature, color='red', ha='center', va='center')

plt.grid()
plt.show()

from ISLP import load_data;

# Load the Auto dataset from package ISLP
Auto = load_data('Auto')

# Prepare features and target variables
X = Auto[['weight', 'acceleration', 'horsepower', 'cylinders']]
y = Auto['mpg']

# Split data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=1)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# 1. Artificial Neural Network with no hidden layers (linear regression)
model_no_hidden = keras.Sequential([
    layers.Input(shape=(4,)),  # 4 input features
    layers.Dense(1)  # No hidden layers
])

model_no_hidden.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_no_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)

# Make predictions and calculate MSE
y_pred_no_hidden = model_no_hidden.predict(X_test_scaled)
mse_no_hidden = mean_squared_error(y_test, y_pred_no_hidden)

# Print MSE
print(f'MSE for no hidden layer ANN: {mse_no_hidden:.2f}')

# Function to draw ANN
def draw_ann(weights, layer_sizes, title):
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111)
    
    # Calculate positions for layers
    layers_pos = [np.linspace(0, size - 1, size) for size in layer_sizes]

    # Draw nodes for each layer
    for i, layer_size in enumerate(layer_sizes):
        for j in range(layer_size):
            ax.scatter(i, layers_pos[i][j], s=200, color='lightblue', edgecolor='black', zorder=5)

    # Draw arrows and display weights
    for i in range(len(weights)):
        weight_matrix = weights[i][0]  # First element is the weight matrix
        for j in range(weight_matrix.shape[1]):  # Iterate through output nodes
            for k in range(weight_matrix.shape[0]):  # Iterate through input nodes
                # Draw arrows from input to output nodes
                ax.arrow(i, layers_pos[i][k], 1, (layers_pos[i + 1][j] - layers_pos[i][k]) * 0.2,
                         head_width=0.1, head_length=0.1, fc='k', ec='k', zorder=4)
                # Display weight value as text
                ax.text(i + 0.5, layers_pos[i][k] + (layers_pos[i + 1][j] - layers_pos[i][k]) * 0.1,
                        f'{weight_matrix[k, j]:.2f}', fontsize=12, ha='center', va='center')

    ax.set_title(title)
    ax.set_xticks(np.arange(len(layer_sizes)))
    ax.set_xticklabels([f'Layer {i + 1}' for i in range(len(layer_sizes))])
    ax.set_yticks([])
    ax.set_ylim(-1, layer_sizes[-1] + 1)
    plt.show()

# Draw the ANN with no hidden layers
weights_no_hidden = model_no_hidden.layers[0].get_weights()
draw_ann([weights_no_hidden], [4, 1], 'ANN with No Hidden Layers')

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
MSE for no hidden layer ANN: 530.30

# 2. ANN with 3 hidden nodes
model_3_hidden = keras.Sequential([
    layers.Input(shape=(4,)),  # 4 input features
    layers.Dense(3, activation='relu'),  # 3 hidden nodes
    layers.Dense(1)  # Output layer
])

model_3_hidden.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_3_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)

# Make predictions and calculate MSE
y_pred_3_hidden = model_3_hidden.predict(X_test_scaled)
mse_3_hidden = mean_squared_error(y_test, y_pred_3_hidden)

# Print MSE
print(f'MSE for 3 hidden nodes ANN: {mse_3_hidden:.2f}')

# Draw the ANN with 3 hidden nodes
weights_3_hidden = [
    model_3_hidden.layers[0].get_weights(),  # Weights from input to first hidden layer
    model_3_hidden.layers[1].get_weights()   # Weights from hidden to output layer
]
draw_ann(weights_3_hidden, [4, 3, 1], 'ANN with 3 Hidden Nodes')

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
MSE for 3 hidden nodes ANN: 315.71

# 3. Multilayer structure with 3 and 2 hidden nodes
model_3_2_hidden = keras.Sequential([
    layers.Input(shape=(4,)),
    layers.Dense(3, activation='relu'),
    layers.Dense(2, activation='relu'),  # 2 hidden nodes
    layers.Dense(1)
])

model_3_2_hidden.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_3_2_hidden.fit(X_train_scaled, y_train, epochs=100, verbose=0)

# Make predictions and calculate MSE
y_pred_3_2_hidden = model_3_2_hidden.predict(X_test_scaled)
mse_3_2_hidden = mean_squared_error(y_test, y_pred_3_2_hidden)

# Print MSE
print(f'MSE for 3-2 hidden nodes ANN: {mse_3_2_hidden:.2f}')

# Draw the ANN with 3 and 2 hidden nodes
weights_3_2_hidden = [
    model_3_2_hidden.layers[0].get_weights(),  # Weights from input to first hidden layer
    model_3_2_hidden.layers[1].get_weights(),  # Weights from first hidden to second hidden layer
    model_3_2_hidden.layers[2].get_weights()   # Weights from second hidden to output layer
]
draw_ann(weights_3_2_hidden, [4, 3, 2, 1], 'ANN with 3 and 2 Hidden Nodes')

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
MSE for 3-2 hidden nodes ANN: 229.91